## Load data

# set directory (use correct path for your computer)
# setwd("/Users/...")

states = c("ak","al","ar","az","ca","co","ct","dc","de","fl","ga","hi","ia","id","il","ind",
"ks","ky","la","ma","md","me","mi","mn","mo","ms","mt","nc","ne","nh","nj","nm","nv","ny",
"oh","ok","or","pa","ri","sc","sd","tn","tx","ut","vr","vt","wa","wi","wv","wy")
search = read.csv("Street et al 2014 search.csv", header=TRUE)
regis_all = read.csv("Street et al 2014 registrations.csv",header=TRUE)
deadlines = read.csv("Street et al 2014 deadlines.csv",header=TRUE)
	
###############################################################################
## Prepare for modelling
## Deadlines
ip_dl = rbind(ifelse(deadlines$ip_dl<950,deadlines$ip_dl-900,
ifelse(deadlines$ip_dl<1050,30+deadlines$ip_dl-1000,61+deadlines$ip_dl-1100)))
colnames(ip_dl) = states
mail_dl = rbind(ifelse(deadlines$mail_dl<950,deadlines$mail_dl-900,
ifelse(deadlines$mail_dl<1050,30+deadlines$mail_dl-1000,61+deadlines$mail_dl-1100)))
colnames(mail_dl) = states
online_dl = rbind(ifelse(deadlines$online_dl<950,deadlines$online_dl-900,
ifelse(deadlines$online_dl<1050,30+deadlines$online_dl-1000,61+deadlines$online_dl-1100)))
colnames(online_dl) = states
e_dl = rbind(deadlines$edr)
colnames(e_dl) = states

## Create indicators for deadlines
ip_dead = sapply(states,function(x) c(rep(0,ip_dl[,x]-1),1,rep(0,67-ip_dl[,x])))
m_dead = sapply(states,function(x) c(rep(0,mail_dl[,x]-1),1,rep(0,67-mail_dl[,x])))
online_dl[is.na(online_dl)==T] <- 1
o_dead = sapply(states,function(x) c(rep(0,online_dl[,x]-1),1,rep(0,67-online_dl[,x])))
o_dead[1,] <- 0
e_dead = sapply(states, function(x) rep(0,67))
e_dead[67, c(1, 8, 13, 14, 22, 24, 27, 30, 39, 48, 50)] <- 1

## Registration counts
# set to NA post-deadline, then vectorize
regis = sapply(states,function(x) c(regis_all[c(1:ip_dl[,x]),x],rep(NA,67-ip_dl[,x])))
# But put some registration numbers back in, i.e. numbers on election-day in few states
# election-day number for alaska
regis[67,1] <- regis_all$ak[67]
# election-day number for idaho
regis[67,14] <- regis_all$id[67]
# cut out period when registration was closed in NC
regis[43:47,28] <- NA
# election-day number for rhode island
regis[67,39] <- regis_all$ri[67]
# election-day number for wyoming
regis[67,50] <- regis_all$wy[67]

## Prepare search data
#srch = search[,-1]
srch = log(search[,-1]-min(search[,-1])+.01) - log(0.01)
# Vectorize search data matrix (first column is dates, so remove it)
#sz = as.vector(unlist(srch[,-1]))
s = as.vector(unlist(search[,-1]))
# Here is a transformation of the search data to pull in the large values
sz = log(s-min(s)+.01) - log(0.01)

## Create factor for state names
st = factor(rep(states,each=67))

#######################################################################
Subset of data for estimation
s.w.d = c("ak","ar","ca","de","fl","id","me","mi","nc","nj","nv","ny","oh","ri","wa","wy")

#y = regis[,s.w.d]
y = regis
we = c(rep(c(1,1,0,0,0,0,0),9),c(1,1,0,0))
#di = ip_dead[,s.w.d]
di = ip_dead
#dm = m_dead[,s.w.d]
dm = m_dead
#do = o_dead[,s.w.d]
do = o_dead
#de = e_dead[,s.w.d]
de = e_dead
#cov = srch[,s.w.d]
cov = srch
n = dim(cov)[1]
m = dim(cov)[2]

#Day to move deadline to:
move.dl = 67
dl = as.vector(ip_dl)
edr = as.vector(e_dl)

############################################################################
# Below is code to calculate the design matrix for the penalized spline term
J = 15
knots<-quantile(unique(sz),seq(0,1,length=(J+2))[-c(1,(J+2))])
x = array(NA,c(n,m,J+1))
for(s in 1:m){
 Z_K<-t(sapply(cov[,s],function(x) abs(x-knots)^3-abs(knots)^3))
 OMEGA_all<-(abs(outer(knots,knots,"-")))^3
 svd.OMEGA_all<-svd(OMEGA_all)
 sqrt.OMEGA_all<-t(svd.OMEGA_all$v%*%(t(svd.OMEGA_all$u)*sqrt(svd.OMEGA_all$d)))
 x[,s,]<-cbind(cov[,s],t(solve(sqrt.OMEGA_all,t(Z_K))))
}

## NOTE: we used JAGS version 3.3.0 
library(R2jags)

############################################################################
## Model Files
#Poisson-Gamma Linear Search Effect/Independent Errors
write("model{
	for(s in 1:m){
		for(t in 1:n){
		 y[t,s] ~ dpois(lambda[t,s])
		 y.pred[t,s] ~ dpois(lambda[t,s])
		 log(mu[t,s]) <- alpha[1] + alpha[2]*we[t] + alpha[3]*di[t,s] + alpha[4]*dm[t,s]
		  + alpha[5]*do[t,s] + alpha[6]*de[t,s] + beta*cov[t,s]
		 lambda[t,s] ~ dgamma(kappa,kappa/mu[t,s])
  		}
	}	
	# Priors on main effect parameters
	for(p in 1:6){
 	 alpha[p] ~ dnorm(0,0.00001)
	}
 	 beta ~ dnorm(0,0.00001)
	# Hyperpriors on variance parameters
	 kappa ~ dunif(0.01,100)
	# Sums to Save
	for(s in 1:m){
		for(t in 1:n){
		 #Only sum before the new DL, and those after current DL in each State, and not EDR
		 new.voters[t,s] <- step(move.dl - t + 0.5)*step(t-dl[s]-0.5)*step(0.5-edr[s])*y[t,s]
		}
	 #Total Additional Registraints By State
	 Sum[s] <- sum(new.voters[,s])
	}
	 #Total Additional Registraints (in Millions)
	 Sum[m+1] <- sum(Sum[1:m])/1000000
}","PGLinMod.txt")

#Poisson-Gamma Linear Search Effect/AR1 Errors
write("model{
	for(s in 1:m){
	 y[1,s] ~ dpois(lambda[1,s])
	 y.pred[1,s] ~ dpois(lambda[1,s])
	 eta[1,s] <- alpha[1] + alpha[2]*we[1] + alpha[3]*di[1,s] + alpha[4]*dm[1,s]
	  + alpha[5]*do[1,s] + alpha[6]*de[1,s] + beta*cov[1,s]
	 log(mu[1,s]) <- eta[1,s]
	 lambda[1,s] ~ dgamma(kappa,kappa/mu[1,s])
		for(t in 2:n){
		 y[t,s] ~ dpois(lambda[t,s])
		 y.pred[t,s] ~ dpois(lambda[t,s])
		 eta[t,s] <- alpha[1] + alpha[2]*we[t] + alpha[3]*di[t,s] + alpha[4]*dm[t,s]
		  + alpha[5]*do[t,s] + alpha[6]*de[t,s] + beta*cov[t,s]
		 log(mu[t,s]) <- eta[t,s] + rho*(log(lambda[t-1,s])-eta[t-1,s])
		 lambda[t,s] ~ dgamma(kappa,kappa/mu[t,s])
  		}
	}	
	# Priors on main effect parameters
	for(p in 1:6){
 	 alpha[p] ~ dnorm(0,0.00001)
	}
 	 beta ~ dnorm(0,0.00001)
	# Hyperpriors on variance parameters
	 kappa ~ dunif(0.01,100)
	 rho ~ dunif(-0.99,0.99)
	# Sums to Save
	for(s in 1:m){
		for(t in 1:n){
		 #Only sum before the new DL , and those after current DL in each State, and not EDR
		 new.voters[t,s] <- step(move.dl - t + 0.5)*step(t-dl[s]-0.5)*step(0.5-edr[s])*y[t,s]
		}
	 #Total Additional Registraints By State
	 Sum[s] <- sum(new.voters[,s])
	}
	 #Total Additional Registraints (in Millions)
	 Sum[m+1] <- sum(Sum[1:m])/1000000
}","PGLinAR1Mod.txt")

#Poisson-Gamma Spline Search Effect/Independent Errors
write("model{
	for(s in 1:m){
		for(t in 1:n){
		 y[t,s] ~ dpois(lambda[t,s])
		 y.pred[t,s] ~ dpois(lambda[t,s])
		 log(mu[t,s]) <- alpha[1] + alpha[2]*we[t] + alpha[3]*di[t,s] + alpha[4]*dm[t,s]
		  + alpha[5]*do[t,s] + alpha[6]*de[t,s] + inprod(beta[],x[t,s,])
		 lambda[t,s] ~ dgamma(kappa,kappa/mu[t,s])
  		}
	}	
	# Priors on main effect parameters
	for(p in 1:6){
 	 alpha[p] ~ dnorm(0,0.00001)
	}
	# Prior on unpenalized component of the spline
 	 beta[1] ~ dnorm(0,0.00001)
	# Prior on penalized components of the spline
	for(j in 1:J){
	 beta[j+1] ~ dnorm(0,tau.beta)
	}
	# Hyperpriors on variance parameters
	 tau.beta <- 1/(sigma.beta*sigma.beta)
	 sigma.beta ~ dunif(0.01,100)
	 kappa ~ dunif(0.01,100)
	# Sums to Save
	for(s in 1:m){
		for(t in 1:n){
		 #Only sum before new DL, and those after current DL in each State, and not EDR
		 new.voters[t,s] <- step(move.dl - t + 0.5)*step(t-dl[s]-0.5)*step(0.5-edr[s])*y[t,s]
		}
	 #Total Additional Registraints By State
	 Sum[s] <- sum(new.voters[,s])
	}
	 #Total Additional Registraints (in Millions)
	 Sum[m+1] <- sum(Sum[1:m])/1000000
}","PGSpMod.txt")

#Poisson-Gamma Spline Search Effect/AR1 Errors
write("model{
	for(s in 1:m){
	 y[1,s] ~ dpois(lambda[1,s])
	 y.pred[1,s] ~ dpois(lambda[1,s])
	 eta[1,s] <- alpha[1] + alpha[2]*we[1] + alpha[3]*di[1,s] + alpha[4]*dm[1,s]
	  + alpha[5]*do[1,s] + alpha[6]*de[1,s] + inprod(beta[],x[1,s,])
	 log(mu[1,s]) <- eta[1,s]
	 lambda[1,s] ~ dgamma(kappa,kappa/mu[1,s])
		for(t in 2:n){
		 y[t,s] ~ dpois(lambda[t,s])
		 y.pred[t,s] ~ dpois(lambda[t,s])
		 eta[t,s] <- alpha[1] + alpha[2]*we[t] + alpha[3]*di[t,s] + alpha[4]*dm[t,s]
		  + alpha[5]*do[t,s] + alpha[6]*de[t,s] + inprod(beta[],x[t,s,])
		 log(mu[t,s]) <- eta[t,s] + rho*(log(lambda[t-1,s])-eta[t-1,s])
		 lambda[t,s] ~ dgamma(kappa,kappa/mu[t,s])
  		}
	}	
	# Priors on main effect parameters
	for(p in 1:6){
 	 alpha[p] ~ dnorm(0,0.00001)
	}
	# Prior on unpenalized component of the spline
 	 beta[1] ~ dnorm(0,0.00001)
	# Prior on penalized components of the spline
	for(j in 1:J){
	 beta[j+1] ~ dnorm(0,tau.beta)
	}
	# Hyperpriors on variance parameters
	 tau.beta <- 1/(sigma.beta*sigma.beta)
	 sigma.beta ~ dunif(0.01,100)
	 kappa ~ dunif(0.01,100)
	 rho ~ dunif(-0.99,0.99)
	# Sums to Save
	for(s in 1:m){
		for(t in 1:n){
		 #Only sum before the new DL, and those after current DL in each State, and not EDR
		 new.voters[t,s] <- step(move.dl - t + 0.5)*step(t-dl[s]-0.5)*step(0.5-edr[s])*y[t,s]
		}
	 #Total Additional Registraints By State
	 Sum[s] <- sum(new.voters[,s])
	}
	 #Total Additional Registraints (in Millions)
	 Sum[m+1] <- sum(Sum[1:m])/1000000
}","PGSpAR1Mod.txt")

############################################################################
## Fit Models
memory.limit(8000)
#Poisson-Gamma Linear Search Effect/Independent Errors
dta <- list("n", "m", "y", "cov", "we", "di", "de", "do", "dm","move.dl","dl","edr")
ints <- function(){list(alpha=rnorm(6), beta=rnorm(1), kappa=runif(1,1,2))}
pars <- c("alpha","beta","kappa","Sum","y.pred")

fit1 <- jags(data=dta,inits=ints,model.file="PGLinMod.txt",parameters=pars,
n.chains=2,n.iter=22000,n.burnin=2000,n.thin=1)

#Poisson-Gamma Linear Search Effect/AR1 Errors
dta <- list("n", "m", "y", "cov", "we", "di", "de", "do", "dm","move.dl","dl","edr")
ints <- function(){list(alpha=rnorm(6), beta=rnorm(1), kappa=runif(1,1,2), rho=runif(1,0,1))}
pars <- c("alpha","beta","kappa","rho","Sum","y.pred")

fit2 <- jags(data=dta,inits=ints,model.file="PGLinAR1Mod.txt",parameters=pars,
n.chains=2,n.iter=22000,n.burnin=2000,n.thin=1)

#Poisson-Gamma Spline Search Effect/Independent Errors
dta <- list("n", "m", "J", "y", "x", "we", "di", "de", "do", "dm","move.dl","dl","edr")
ints <- function(){list(alpha=rnorm(6), beta=rnorm(J+1,0,.1), sigma.beta=runif(1,0.01,100),
 kappa=runif(1,1,1))}
pars <- c("alpha","beta","kappa","Sum","y.pred")

fit3 <- jags(data=dta,inits=ints,model.file="PGSpMod.txt",parameters=pars,
n.chains=2,n.iter=110000,n.burnin=10000,n.thin=1)

#Poisson-Gamma Spline Search Effect/AR1 Errors
dta <- list("n", "m", "J", "y", "x", "we", "di", "de", "do", "dm","move.dl","dl","edr")
ints <- function(){list(alpha=rnorm(6), beta=rnorm(J+1,0,.1), sigma.beta=runif(1,0.01,100),
 kappa=runif(1,1,1), rho=runif(1))}
pars <- c("alpha","beta","kappa","rho","Sum","y.pred")

fit4 <- jags(data=dta,inits=ints,model.file="PGSpAR1Mod.txt",parameters=pars,
n.chains=2,n.iter=110000,n.burnin=10000,n.thin=1)

############################################################################
## Convergence Assessment
#Check whether all monitored parameters have Rhat <= 1.1 (Gelman-Rubin Diagnostic)
sum(fit1$BUGSoutput$summary[,"Rhat"]>1.1)==0
sum(fit2$BUGSoutput$summary[,"Rhat"]>1.1)==0
sum(fit3$BUGSoutput$summary[,"Rhat"]>1.1)==0
sum(fit4$BUGSoutput$summary[,"Rhat"]>1.1)==0

#Selected traceplots from fit4
params = c("alpha[1]","alpha[2]","alpha[3]","beta[1]","beta[15]","sigma.beta","kappa","rho")
par(mfrow=c(4,2),mar=c(4.2,4.2,.2,.2),mgp=c(2.8,.8,0),las=1)
for(i in 1:8){
plot(fit4$BUGSoutput$sims.array[,1,params[i]],type='l',ylab=params[i],
 xlab="MCMC Sample Iteration")
lines(fit4$BUGSoutput$sims.array[,2,params[i]],col='red')
}

############################################################################
## Model Comparison

#Predicted # of Additional Registrants by Model
add_res = rbind(quantile(fit1$BUGSoutput$sims.matrix[,"Sum[51]"],c(.05,.5,.95)),
quantile(fit2$BUGSoutput$sims.matrix[,"Sum[51]"],c(.05,.5,.95)),
quantile(fit3$BUGSoutput$sims.matrix[,"Sum[51]"],c(.05,.5,.95)),
quantile(fit4$BUGSoutput$sims.matrix[,"Sum[51]"],c(.05,.5,.95)))
rownames(add_res) = c("Linear","Lin/AR1","Spline","Sp/AR1")
print(add_res)

#DIC and pD Comparison
c(fit1$BUGSoutput$DIC,fit2$BUGSoutput$DIC,fit3$BUGSoutput$DIC,fit4$BUGSoutput$DIC)
#pD Comparison
c(fit1$BUGSoutput$pD,fit2$BUGSoutput$pD,fit3$BUGSoutput$pD,fit4$BUGSoutput$pD)

## State by state estimates in 1000s for Spline/AR1 Model (i.e. fit4)
tot.new.votes.post4 = fit4$BUGSoutput$sims.list$Sum
#Compute posterior summaries, mean, 90% CIs
add_res_by_state4 = apply(tot.new.votes.post4[,-51],2,
function(x) c(mean(x),quantile(x,c(0.05,.95)))/1000)
colnames(add_res_by_state4) = states
#Print out results, in thousands
print(add_res_by_state4)


## To get predictions from models:
#Predicted number of registrations by quantiles of Google search volume
#Registration Values to Predict
cov.new = quantile(unlist(cov),c(0.01,.1,.25,.5,.75,.9,.99))
#Spline Design Matrix
x.new <- cbind(1,cov.new,t(solve(sqrt.OMEGA_all,
 sapply(cov.new,function(x) abs(x-knots)^3 - abs(knots)^3))))

########
#Model 1
#Predicted Number of Registrants at Quantiles of Search Volume
beta.post1 <- t(cbind(fit1$BUGSoutput$sims.list$alpha[,1],fit1$BUGSoutput$sims.list$beta))
sp.est1 <- exp(apply(cbind(1,cov.new)%*%beta.post1,1,
function(x) c(mean(x),quantile(x,c(0.05,0.95)))))
print(round(sp.est1,0))
#Deadline Effects
alpha.post1 <- t(fit1$BUGSoutput$sims.matrix[,53:57])
var.est1 <- apply(exp(alpha.post1),1,function(x) c(mean(x),quantile(x,c(0.025,0.975))))
print(round(var.est1,2))

########
#Model 2
#Predicted Number of Registrants at Quantiles of Search Volume
beta.post2 <- t(cbind(fit2$BUGSoutput$sims.list$alpha[,1],fit2$BUGSoutput$sims.list$beta))
sp.est2 <- exp(apply(cbind(1,cov.new)%*%beta.post2,1,
function(x) c(mean(x),quantile(x,c(0.05,0.95)))))
print(round(sp.est2,0))
#Deadline Effects
alpha.post2 <- t(fit2$BUGSoutput$sims.matrix[,53:57])
var.est2 <- apply(exp(alpha.post2),1,function(x) c(mean(x),quantile(x,c(0.025,0.975))))
print(round(var.est2,2))

########
#Model 3
#Predicted Number of Registrants at Quantiles of Search Volume
beta.post3 <- t(cbind(fit3$BUGSoutput$sims.list$alpha[,1],fit3$BUGSoutput$sims.list$beta))
sp.est3 <- exp(apply(x.new%*%beta.post3,1,
function(x) c(mean(x),quantile(x,c(0.05,0.95)))))
print(sp.est3)
#Deadline Effects
alpha.post3 <- t(fit3$BUGSoutput$sims.matrix[,53:57])
var.est3 <- apply(exp(alpha.post3),1,function(x) c(mean(x),quantile(x,c(0.025,0.975))))
print(round(var.est3,2))

########
#Model 4
#Predicted Number of Registrants at Quantiles of Search Volume
beta.post4 <- t(cbind(fit4$BUGSoutput$sims.list$alpha[,1],fit4$BUGSoutput$sims.list$beta))
sp.est4 <- exp(apply(x.new%*%beta.post4,1,
function(x) c(mean(x),quantile(x,c(0.05,0.95)))))
print(sp.est4)
#Deadline Effects
alpha.post4 <- t(fit4$BUGSoutput$sims.matrix[,53:57])
var.est4 <- apply(exp(alpha.post4),1,
function(x) c(mean(x),quantile(x,c(0.025,0.975))))
print(round(var.est4,2))


##
## end
##